Lorenzo Mattioli
  • Home
  • Blog
  • Projects
  • Publications
  • About

On this page

  • Introduction
  • Mapping inequality
  • Ranking Italian regions
  • Investigating causes of poverty through LPM modeling

Workshop - analyse inequality

  • Show All Code
  • Hide All Code

  • View Source
Teaching
Income inequality
Umbria
Data visualisation
Interactive
Author

Lorenzo Mattioli

Published

13 aprile 2026

Introduction

Show code
packages <- c('ggbump','tidyverse','laeken','MetBrewer','sf','ggiraph')
install.packages(setdiff(packages, rownames(installed.packages()))) 
rm(packages)
library(ggbump)
library(tidyverse)
library(laeken)
library(MetBrewer)
library(sf)
library(ggiraph)
# import pdata
pdata <- readRDS('SHIWpdata.rds')
# Inequality indexes -----------------------------------------------------------
## Gini - Income ----
### Over regions by year
as_tibble(gini(pdata$eqhincome,
     weights = pdata$peso,
     years = pdata$anno,
     breakdown = pdata$ireg,
     na.rm = T
)[['valueByStratum']]) -> giniInc

giniInc$stratum <- gsub(' - ', '-', giniInc$stratum)
### ranking
giniInc |> 
  group_by(year) |> 
  mutate(
    rank = rank(value)
  ) -> giniInc
### rounding off value
giniInc$value <- round(giniInc$value, 2)

### ranking5
giniInc$rank5 <- case_match(giniInc$rank,
                            1:4 ~ 1,
                            5:8 ~ 2,
                            9:12 ~ 3,
                            13:16 ~ 4,
                            17:20 ~ 5
                            )
### Total by year
as_tibble_col(gini(pdata$eqhincome,
     weights = pdata$peso,
     years = pdata$anno,
     na.rm = T
)[["value"]]) -> giniIncTot

giniIncTot$stratum <- 'Italia'
giniIncTot$year <- c(2000, 2002, 2004, 2008, 2010, 2012, 2014, 2016, 2020)
giniIncTot$rank <- NA

giniInc <- rbind(giniInc, giniIncTot)
rm(giniIncTot)


## Gini - wealth ----
### Over regions by year
as_tibble(gini(pdata$pcwealth,
               weights = pdata$peso,
               years = pdata$anno,
               breakdown = pdata$ireg,
               na.rm = T
)[['valueByStratum']]) -> giniW

giniW$stratum <- gsub(' - ', '-', giniW$stratum)

### ranking
giniW |> 
  group_by(year) |> 
  mutate(
    rank = rank(value)
  ) -> giniW
### ranking5
giniW$rank5 <- case_match(giniW$rank,
                            1:4 ~ 1,
                            5:8 ~ 2,
                            9:12 ~ 3,
                            13:16 ~ 4,
                            17:20 ~ 5
)

### rounding off value
giniW$value <- round(giniW$value, 2)

### Total by year
as_tibble_col(gini(pdata$pcwealth,
                   weights = pdata$peso,
                   years = pdata$anno,
                   na.rm = T
)[["value"]]) -> giniWTot

giniWTot$stratum <- 'Italia'
giniWTot$year <- c(2000, 2002, 2004, 2008, 2010, 2012, 2014, 2016, 2020)
giniWTot$rank <- NA

giniW <- rbind(giniW, giniWTot)
rm(giniWTot)



# Poverty  ---------------------------------------------------------------------

## Head count ----
### Over region by year
pdata |> 
  group_by(anno, ireg) |> 
  count(pov) -> pov
pdata |> 
  group_by(anno, ireg) |> 
  count(!pov) -> pov$'!pov'
pov |> 
  mutate('!pov' = `!pov`$n) |> 
  filter(pov == 1) |> 
  mutate(pov = n) |> 
  select(!n) -> pov

pov |> 
  mutate(hCount = pov/(sum(pov, `!pov`))) -> pov

### Ranking
pov |> 
  group_by(anno) |> 
  mutate(
    rankHCount = 21-rank(hCount)
  ) -> pov

## Poverty intensity ----
### povLine
pdata |> 
  group_by(anno) |> 
  summarise(povLine = weightedMedian(eqhincome,
                                     weights = peso)*0.6) -> povLine
pov <- left_join(pov, povLine)
rm(povLine)

### Poverty gap
avPoor <- pdata |>
  filter(pov == 1) |> 
  group_by(anno, ireg) |>
  summarise(avPoor = weightedMean(eqhincome, weights = peso))
pov <- left_join(pov, avPoor)
rm(avPoor)
pov$povGapIndex <- pov$hCount * (pov$povLine - pov$avPoor)/pov$povLine

### Ranking
pov |> 
  group_by(anno) |> 
  mutate(
    rankPovGap = 21-rank(povGapIndex)
  ) -> pov

## LPM risk of being in poverty ----
pdata <- within(pdata, cfedu <- relevel(factor(cfedu), ref = 'Specializzazione post-laurea'))
pdata <- within(pdata, cfsex <- relevel(factor(cfsex), ref = 'Maschile'))
pdataReg <- pdata |> 
  filter(anno == 2020)

### Regression models
lpm0 <- lm(pov ~ factor(cfedu),
   weights = pesopop,
   data = pdataReg)
lpm1 <- lm(pov ~ factor(cfedu) + factor(cfsex) + factor(cfclass),
           weights = pesopop,
           data = pdataReg)

### Harvesting results
lpm0results <- as_tibble(summary(lpm0)[["coefficients"]])
lpm0results <- cbind(lpm0results, confint(lpm0, level=0.95)) #adding CIs

lpm1results <- as_tibble(summary(lpm1)[["coefficients"]])
lpm1results <- cbind(lpm1results, confint(lpm1, level=0.95)) #adding CIs
lpm0results <- lpm0results %>% setNames(paste0('0.', names(.)))

### Cleaning results
lpm0results <- lpm0results |> 
  rownames_to_column(var = 'reg')
lpm1results <- lpm1results |> 
  rownames_to_column(var = 'reg')

lpmresults <- full_join(lpm0results, lpm1results)
rm(lpm0results, lpm1results, lpm0, lpm1)

lpmresults['reg'][lpmresults['reg'] == '(Intercept)'] <- ')Intercept'

lpmresults <- lpmresults |> 
  mutate(reg = str_split_fixed(reg, fixed(')'), n = Inf)) 

lpmresults <- within(lpmresults, reg <- reg[,2])

lpmresults$'0.star' <- ifelse(lpmresults$`0.Pr(>|t|)` <= 0.001, '***',
                            ifelse(lpmresults$`0.Pr(>|t|)` <= 0.01, '**',
                                   ifelse(lpmresults$`0.Pr(>|t|)` <= 0.05, '*',
                                          ifelse(lpmresults$`0.Pr(>|t|)` <= 0.1, '.', ''))))

lpmresults$star<- ifelse(lpmresults$`Pr(>|t|)` <= 0.001, '***',
                              ifelse(lpmresults$`Pr(>|t|)` <= 0.01, '**',
                                     ifelse(lpmresults$`Pr(>|t|)` <= 0.05, '*',
                                            ifelse(lpmresults$`Pr(>|t|)` <= 0.1, '.', ''))))

lpmresults <- lpmresults |> 
  relocate('0.star', .before = Estimate)

lpmresults$vars <- c('Intercetta', 'Titolo di studio', 'Titolo di studio', 'Titolo di studio', 'Titolo di studio', 'Titolo di studio', 'Sesso', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale', 'Settore economico dell\'occupazione principale')

Ad Aprile 2026 ho tenuto un workshop di metodi della ricerca sociale nel campo delle disuguaglianze economiche.

Di seguito i link al notebook con i codici sorgente:

Notebook

Qui sotto una selezione delle visualizzazioni (tradotte in inglese)

Mapping inequality

Show code
##### import sf
regMap <- readRDS('regMap.rds')

##### merge Gini tibbles with sf data
incMap <- left_join(giniInc, regMap, by = join_by(stratum == DEN_REG))

##### ggiraph ready map
incGiniGG <- incMap |> 
  filter(year == 2000 | year == 2010 |year == 2020) |> 
  drop_na(rank) |> 
  ggplot() +
  geom_sf_interactive(aes(geometry = geometry, fill = value, data_id = stratum, tooltip = value), colour = 'black') +
  facet_wrap(vars(year)) +
  labs(x = NULL, y = NULL,
       title = 'Gini index by region',
       subtitle = 'Equivalent household income',
       caption = 'Data: Bank of Italy. Elaborated by Lorenzo Mattioli - Una Regione per Restare') +
  theme_minimal(base_family = 'Helvetica') +
  scale_fill_viridis_c(direction = -1, limits = c(15, 40), option = 'mako') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(size = 20,
                                  hjust = .5),
        plot.subtitle = element_text(size = 15,
                                     hjust = .5),
        plot.caption = element_text(size = 10,
                                    hjust = .5))


##### interactive map
girafe(ggobj = incGiniGG,
       width_svg = 13,
       options = list(
         opts_hover(css = ''), ## CSS code of line we're hovering over
         opts_hover_inv(css = "opacity:0.3;"), ## CSS code of all other lines
         opts_tooltip(css = "background-color:white;
                      color:black;
                      font-family:Helvetica;
                      font-style:empty;
                      padding:8px;
                      border-radius:10px;",
                      use_cursor_pos = T),
         opts_toolbar(position = 'bottomright')
       ))
Show code
pov$ireg <- gsub(' - ', '-', pov$ireg)
povMap <- left_join(pov, regMap, by = join_by(ireg == DEN_REG))
##### ggiraph ready map
povMap$hCount <- round(povMap$hCount*100, 2)

povhGG <- povMap |> 
  filter(anno == 2000 | anno == 2010 |anno == 2020) |>
  ggplot() +
  geom_sf_interactive(aes(geometry = geometry, fill = hCount, data_id = ireg, tooltip = hCount), colour = 'black') +
  facet_wrap(vars(anno)) +
  labs(x = NULL, y = NULL,
       title = 'Poverty headcount by region',
       caption = 'Data: Bank of Italy. Elaborated by Lorenzo Mattioli - Una Regione per Restare') +
  theme_minimal(base_family = 'Helvetica') +
  scale_fill_viridis_c(direction = -1, limits = c(0, 51), option = 'inferno') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        panel.grid = element_blank(),
        legend.title = element_blank(),
        plot.title = element_text(size = 20,
                                  hjust = .5),
        plot.subtitle = element_text(size = 15,
                                     hjust = .5),
        plot.caption = element_text(size = 10,
                                    hjust = .5))


##### interactive map
girafe(ggobj = povhGG,
       width_svg = 13,
       options = list(
         opts_hover(css = ''), ## CSS code of line we're hovering over
         opts_hover_inv(css = "opacity:0.3;"), ## CSS code of all other lines
         opts_tooltip(css = "background-color:white;
                      color:black;
                      font-family:Helvetica;
                      font-style:empty;
                      padding:8px;
                      border-radius:10px;",
                      use_cursor_pos = T),
         opts_toolbar(position = 'bottomright')
       ))

Ranking Italian regions

Show code
# Gini
giniInc |> 
  filter(year == 2000 | year == 2004 | year == 2008 | year == 2012 | year == 2016 | year == 2020) |> 
  drop_na(rank) |> 
  ggplot(aes(x = year, y = rank, data_id = stratum)) +
  geom_bump(linewidth = 0.6, color = 'gray90',
            data = ~. |> filter(stratum != 'Umbria')) +
  geom_bump(aes(colour = stratum), linewidth = 0.8,
            data = ~. |> filter(stratum == 'Umbria' | stratum == 'Lombardia' | stratum == 'Abruzzo')) +
  geom_point(color = 'gray90',
             data = ~. |> filter(stratum != 'Umbria'),
             size = 4) +
  geom_point(aes(colour = stratum),
             data = ~. |> filter(stratum == 'Umbria' | stratum == 'Lombardia' | stratum == 'Abruzzo'),
             size = 4) +
  geom_point(color = 'white', size = 2) +
  geom_text_interactive(aes(label = stratum, group = stratum), colour = 'gray90', x = 2021, hjust = 0, size = 3.5, family = 'Helvetica',
                        data = ~. |> filter(year == 2020)) +
  geom_text(aes(label = stratum, group = stratum), colour = 'black', x = 2021, hjust = 0,, size = 3.5, family = 'Helvetica',
            data = ~. |> filter(year == 2020 & stratum == 'Umbria' | year == 2020 & stratum == 'Lombardia' | year == 2020 & stratum == 'Abruzzo')) +
  scale_color_viridis_d(option = 'mako', end = .6) +
  scale_x_continuous(limits = c(2000, 2024) ,expand = c(0.01, 0), breaks=c(2000, 2004, 2008, 2012, 2016, 2020)) +
  scale_y_reverse(expand = c(0.02, 0), breaks = c(5, 10, 15, 20)) +
  labs(x = NULL, y = NULL,
       title = 'Ranking Italian regions by Gini index',
       subtitle = 'Equivalent household income',
       caption = 'Data: Bank of Italy. Elaborated by Lorenzo Mattioli - Una Regione per Restare') +
  theme_minimal(base_family = 'Helvetica') +
  theme(
    legend.position = 'none',
    panel.grid = element_blank(),
    plot.title = element_text(size = 20,
                              hjust = .5),
    plot.subtitle = element_text(size = 15,
                                 hjust = .5),
    plot.caption = element_text(size = 10,
                                hjust = .5)
  )
# Poverty
pov |> 
  filter(anno == 2000 | anno == 2004 | anno == 2008 | anno == 2012 | anno == 2016 | anno == 2020) |> 
  ggplot(aes(x = anno, y = rankHCount, group = ireg, data_id = ireg)) +
    geom_bump(linewidth = 0.6, color = "gray90", smooth = 6) +
    geom_bump(aes(colour = ireg), linewidth = 0.8, smooth = 6,
                data = ~. |> filter(ireg == 'Umbria' | ireg == 'Lombardia' | ireg == 'Abruzzo')) +
    geom_point(color = "gray90", size = 4) +
    geom_point(aes(colour = ireg),
               data = ~. |> filter(ireg == 'Umbria' | ireg == 'Lombardia' | ireg == 'Abruzzo'),
               size = 4) +
    geom_point(color = 'white', size = 2) +
    geom_text(aes(label = ireg, group = ireg), colour = 'gray90', x = 2021, hjust = 0, size = 3.5, family = 'Helvetica',
              data = ~. |> filter(anno == 2020)) +
    geom_text(aes(label = ireg, group = ireg), colour = 'black', x = 2021, hjust = 0,, size = 3.5, family = 'Helvetica',
              data = ~. |> filter(anno == 2020 & ireg == 'Umbria' | anno == 2020 & ireg == 'Lombardia' | anno == 2020 & ireg == 'Abruzzo')) +
    scale_color_manual(values = met.brewer('Degas')) +
    scale_x_continuous(limits = c(2000, 2024) ,expand = c(0.01, 0), breaks=c(2000, 2010, 2020)) +
    scale_y_reverse(expand = c(0.02, 0), breaks = c(1, 5, 10, 15, 20)) +
    labs(x = NULL, y = NULL,
         title = 'Ranking Italian regions by poverty rate',
         subtitle = 'Headcount, from poorest to least poor',
         caption = 'Data: Bank of Italy. Elaborated by Lorenzo Mattioli - Una Regione per Restare') +
    theme_minimal(base_family = 'Helvetica') +
    theme(
      legend.position = 'none',
      panel.grid = element_blank(),
      plot.title = element_text(size = 20,
                           hjust = .5),
      plot.subtitle = element_text(size = 15,
                                   hjust = .5),
      plot.caption = element_text(size = 10,
                                  hjust = .5)
    )

Investigating causes of poverty through LPM modeling

Show code
lpmresults$vars <- factor(lpmresults$vars, levels=unique(lpmresults$vars))
lpmresults$roundEst <- round(lpmresults$Estimate, 2)

gglpm <- lpmresults |> 
  filter(vars == 'Titolo di studio') |> 
  mutate(reg = factor(reg, levels = c('Laurea', 'Medie superiori', 'Medie inferiori', 'Licenza elementare', 'Nessuno'))) |> 
  ggplot(aes(x=Estimate, y=reg, colour = vars, data_id = roundEst, tooltip = roundEst)) +
  geom_vline(xintercept = 0,
             linewidth = 0.4,
             color = 'gray70') +
  geom_linerange_interactive(aes(xmin=`2.5 %`,xmax=`97.5 %`), linewidth = .6) +
  geom_point_interactive(size = 6) +
  geom_point(colour = 'white', size = 3) +
  geom_point_interactive(aes(x = `2.5 %`), shape = '|', size = 6) +
  geom_point_interactive(aes(x = `97.5 %`), shape = '|', size = 6) +
  labs(x = NULL, y = NULL,
       title = 'Probability of relative poverty based on head of household\'s educational attainment',
       subtitle = 'Reference category: post-lauream specialisation',
       caption = 'Data: Bank of Italy. Elaborated by Lorenzo Mattioli - Una Regione per Restare') +
  scale_color_manual(values = met.brewer('Degas')) +
  theme_minimal(base_family = 'Helvetica') +
  theme(panel.grid = element_line(),
        legend.position = 'none',
        axis.text = element_text(size = 13),
        plot.title = element_text(size = 20,
                                  hjust = 0),
        plot.subtitle = element_text(size = 15,
                                     hjust = 0),
        plot.caption = element_text(size = 10,
                                    hjust = .5))

girafe(ggobj = gglpm,
       width_svg = 13,
       options = list(
         opts_hover(css = ''), ## CSS code of line we're hovering over
         opts_hover_inv(css = "opacity:0.3;"), ## CSS code of all other lines
         opts_tooltip(css = "background-color:white;
                      color:black;
                      font-family:Helvetica;
                      font-style:empty;
                      padding:8px;
                      border-radius:10px;",
                      use_cursor_pos = T),
         opts_toolbar(position = 'bottomright')))
 
Cookie Preferences